## 

rm(list = ls())

## 

library(rdrobust)
library(tidyverse)
library(pbapply)

## Declare covars

covs <- c('turnout_party_2009_btw', 'soz_vers_beschaeftigte_share',
          'pop_density_km2', 'migration_out_share', 'refugees_2017')

## Get federal election data

bt <- readRDS("data/data_federal.rds") %>%
  filter(!is.na(treated))

## List of outcomes

outcomes <- c('turnout_party',
              'agg_left_party', 
              'agg_center_party', 
              'agg_right_party')

## Source helper function to tidy rdrobust output

source("code/tidy_rd.R")

## First differences

diff_df <- pblapply(outcomes, function(o) {
  out <- bt %>%
    filter(year > 2012) %>%
    pivot_wider(values_from = o, names_from = 'year', id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2017  - o2013) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(bt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs), state_id) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1)

## Estimate main effect w/ optimal BWs

out_rd_opt <- pblapply(outcomes, function(o) {
  
  ## Iterate over whether to exclude B-W
  
  lapply(c(T,F), function(exclude_state) {
    
    if (exclude_state) {
      subset_select <- !diff_df$state_id == '08' 
    } else {
      subset_select <- rep(T, nrow(diff_df))
    }
    
    ## Estimate
    
    out <- rdrobust(y = diff_df[subset_select, o] %>% pull(!!o), 
                    x = diff_df$runvar[subset_select], c = 0,
                    covs = diff_df[subset_select, covs])
    
    ## Tidy and return
    
    out2 <- out %>% 
      tidy_rd(se_nr = 3) %>% 
      mutate(outcome = o,
             exclude_state = ifelse(exclude_state, 
                                    'State excluded', 'All states')) %>% 
      dplyr::rename(se = std.error, pval = p.value, 
                    bw = bw_left_h,
                    bw_bias = bw_left_b) %>% 
      mutate(conf.low90 = estimate - qnorm(0.95)*se,
             conf.high90 = estimate + qnorm(0.95)*se)
    
    out2
  }) %>% reduce(rbind)
}) %>%
  reduce(rbind)

## Rename

rd_federal <- out_rd_opt

## Remove everything but rd results and covariates

rm(list = ls()[!ls() %in% c("rd_federal", "covs", "tidy_rd")])

## State elections ##

## Get state election date ##

lt <- readRDS('data/data_state.rds') %>%
  filter(!is.na(treated)) %>%
  mutate(year = lubridate::year(date)) %>%
  mutate(state_id = substr(ags, 1, 2))

## Get list of states to use

state_list <- read_rds("data/states_census.rds") %>%
  dplyr::select(applies_census, state_id, census_first_year)

## Merge to main 

lt <- left_join(lt, state_list)

## List of outcomes

outcomes <- c('agg_left', 'agg_right', 'agg_center', 'incumbent')

## Convert years to periods rel. to treatment

table(lt$state_id, lt$year)
lt <- lt %>%
  mutate(time_rel = year - census_first_year)

## This converts each election year to time period relative to treatment
## Errors can be ignored

periods_by_state <- lt %>%
  group_by(state_id) %>%
  summarise(l = list(unique(time_rel))) %>%
  unnest(cols = c(l))
periods_by_state <- periods_by_state %>%
  group_by(state_id) %>%
  summarise(l_r = list(rank(l))) %>%
  unnest(cols = c(l_r)) %>% dplyr::select(-state_id) %>% 
  bind_cols(periods_by_state, .)
periods_by_state <- periods_by_state %>% 
  group_by(state_id) %>%
  mutate(l_r = l_r - max(l_r)) %>%
  filter(!is.na(l)) %>%
  rename(time_rel_period = l_r,
         time_rel = l)

## Merge this back to the main DF

lt <- lt %>%
  left_join(periods_by_state) %>%
  mutate(post = ifelse(time_rel_period > -1, 1, 0),
         treated_post = post * treated)

lt %>% 
  distinct(land,year, census_first_year, post)

## Drop period t-2 (i.e. we only want two periods)

lt <- lt %>% 
  filter(time_rel_period > -2)

## Get first differences

diff_df <- pblapply(outcomes, function(o) {
  out <- lt %>%
    filter(time_rel_period > -2) %>%
    mutate(time_rel_period = time_rel_period + 2)  %>% 
    pivot_wider(values_from = o, names_from = 'time_rel_period', 
                id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2  - o1) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(lt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs)) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1)

# Exclude S-H

diff_df <- diff_df %>%
  filter(!str_sub(ags, 1, 2) == '01')

## Estimate
## This loops over outcomes

out_rd_opt <- pblapply(outcomes, function(o) {
  
  ## Iterate over whether or not to exclude B-W
  
  lapply(c(T,F), function(exclude_state) {
    
    if (exclude_state) { 
      subset_select <- !(substr(diff_df$ags, 1, 2) == '08') 
    } else {
      subset_select <- rep(T, nrow(diff_df))
    }
    
    ## Estimate
    
    out <- rdrobust(y = diff_df[subset_select, o] %>% pull(!!o), 
                    x = diff_df$runvar[subset_select], c = 0,
                    covs = diff_df[subset_select, covs])
    
    ## Return this : Robust B-C SE
    out2 <- out %>% 
      tidy_rd(se_nr = 3) %>% 
      mutate(outcome = o, 
             exclude_state = ifelse(exclude_state, 
                                    'State excluded', 'All states')) %>% 
      dplyr::rename(se = std.error, pval = p.value, 
                    bw = bw_left_h,
                    bw_bias = bw_left_b) %>% 
      mutate(conf.low90 = estimate - qnorm(0.95)*se,
             conf.high90 = estimate + qnorm(0.95)*se)
    
    ## Return
    
    out2
    
  }) %>% reduce(rbind) 
}) %>%
  reduce(rbind)

rd_state <- out_rd_opt

## Remove everything except covariates and RD results ##

rm(list = ls()[!ls() %in% c("rd_state", "rd_federal", "covs", "tidy_rd")])

## Get municipal election data

mu <- readRDS('data/data_municipal.rds') %>%
  filter(!is.na(treated)) %>%
  mutate(state_id = substr(ags, 1, 2))

## Drop period t-2 (i.e. we only want two periods)

mu <- mu %>% 
  filter(time_rel_period > -2)

## Declare outcomes to use

outcomes <- c('agg_left', 'agg_right', 'agg_center')

## Convert to first differences

diff_df <- pblapply(outcomes, function(o) {
  out <- mu %>%
    filter(time_rel_period > -2) %>%
    mutate(time_rel_period = time_rel_period + 2)  %>% 
    pivot_wider(values_from = o, names_from = 'time_rel_period', 
                id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2  - o1) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(mu %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs), state_id) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1)

## Estimate
## This loops over outcomes

out_rd_opt <- pblapply(outcomes, function(o) {
  
  ## Iterate over whether or not to exclude B-W
  
  lapply(c(T,F), function(exclude_state) {
    
    if (exclude_state) {
      subset_select <- !diff_df$state_id == '08' 
    } else {
      subset_select <- rep(T, nrow(diff_df))
    }
    
    out <- rdrobust(y = diff_df[subset_select, o] %>% pull(!!o), 
                    x = diff_df$runvar[subset_select], c = 0,
                    covs = diff_df[subset_select, covs])
    
    ## Tidy and return
    
    out2 <- out %>% 
      tidy_rd(se_nr = 3) %>% 
      mutate(outcome = o, 
             exclude_state = ifelse(exclude_state, 
                                    'State excluded', 'All states')) %>% 
      dplyr::rename(se = std.error, pval = p.value, 
                    bw = bw_left_h,
                    bw_bias = bw_left_b) %>% 
      mutate(conf.low90 = estimate - qnorm(0.95)*se,
             conf.high90 = estimate + qnorm(0.95)*se)
    
    ## Return
    
    out2
  }) %>% reduce(rbind)
}) %>%
  reduce(rbind)

rd_municipal <- out_rd_opt

## Rm

rm(list = ls()[!str_detect(ls(), "rd_")])
rm(out_rd_opt)

# Table A.7 : controlling for refugee presence ----

rd_res <- bind_rows(rd_federal, rd_state, rd_municipal) %>% 
  filter(outcome %in% c("agg_left", "agg_left_party")) %>% 
  filter(exclude_state == "State excluded") %>% 
  dplyr::select(estimate, se, pval, bw, n) %>% 
  mutate(election = c("Federal", "State", "Municipal"))
rd_res

